Installation

  1. Install R
  2. Install RTools if you are on Windows
  3. Install RStudio

For more details, see Software and Package Versions.

Running This Code

  1. Ensure the installation steps above are completed
  2. Download a zip of the code and data here and unzip it
  3. In RStudio, open the src/src.Rproj file
  4. Then, open the src/index.Rmd file
  5. In RStudio:
    • Run all code: Click the Run drop down (top right of the code pane) and click Run All
    • Generate HTML version: Click knit (top left of code pane) and a file will be generated in docs/index.html

Libraries

Install R packages if needed.

# Required packages
required_packages <- c(
    "rmarkdown",
    "bookdown",
    "knitr",
    "tidyverse",
    "purrr",
    "glue",
    "lubridate",
    "scales",
    "patchwork",
    "DiagrammeR",
    "DiagrammeRsvg",
    "webshot2",
    "magick",
    "rsvg",
    "sf",
    "tmap",
    "ggspatial",
    "prettymapr",
    "units"
)

# Try to install packages if not installed
default_options <- options()
tryCatch(
    {
        # Disable interactivity
        options(install.packages.compile.from.source = "always")
        
        # Install package if not installed
        for (package in required_packages) {
            is_package_installed <- require(package, character.only = TRUE)
            if (!is_package_installed) {
                cat(paste0("Installing package: ", package, "\n"))
                install.packages(package)
            } else {
                cat(paste0("Package already installed: ", package, "\n"))
            }
        }
    },
    error = function(cond) {
        stop(cond)
    },
    finally = {
        options(default_options) # reset interactivity
    }
)

Load R libraries.

library(DiagrammeR)
library(ggplot2)
library(ggspatial)
library(glue)
library(lubridate)
library(patchwork)
library(sf)
library(tidyverse)
library(tmap)

Data

Read data from the data folder.

ddesc <- read_csv("../data/data.csv")
ddesc

Vancouver Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Vancouver, Canada

# Read data
vancbike_raw <- read_sf("../data/vancouver-bikeways-2024-06-02.geojson")

# Get download date
vancbike_dldate <- ddesc %>% filter(
    file == "vancouver-bikeways-2024-06-02.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(vancbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 23
  • Rows: 3666
vancbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

vancbike_ddict <- read_csv("../data/vancouver-bikeways-2024-06-02-datadict.csv")
vancbike_ddict

Details

print(vancbike_raw)
## Simple feature collection with 3666 features and 22 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -123.2238 ymin: 49.19899 xmax: -123.0233 ymax: 49.31428
## Geodetic CRS:  WGS 84
## # A tibble: 3,666 × 23
##    id     street     status road_type road_type_recode install_year install_type
##    <chr>  <chr>      <chr>  <chr>     <chr>                   <dbl> <chr>       
##  1 294725 Highbury   Active Resident… Local                    2006 Local Street
##  2 294726 Highbury   Active Resident… Local                    2006 Local Street
##  3 294731 W 8th Ave  Active Resident… Local                    1994 Local Street
##  4 294732 W 8th Ave  Active Resident… Local                    1994 Local Street
##  5 294733 Off Street Active Lane      Local                    2003 Protected B…
##  6 294736 W 5th Ave  Active Resident… Local                    2009 Local Street
##  7 294737 W 8th Ave  Active Resident… Local                    1994 Local Street
##  8 294738 W 7th Ave  Active Resident… Local                    1994 Local Street
##  9 294739 W 7th Ave  Active Resident… Local                    1994 Local Street
## 10 294742 W 7th Ave  Active Resident… Local                    1994 Local Street
## # ℹ 3,656 more rows
## # ℹ 16 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Calgary Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Calgary, Canada

# Read data
calgbike_raw <- read_sf("../data/calgary-bikeways-2024-06-05.geojson")

# Get download date
calgbike_dldate <- ddesc %>% filter(
    file == "calgary-bikeways-2024-06-05.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(calgbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 22
  • Rows: 4169
calgbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

calgbike_ddict <- read_csv("../data/calgary-bikeways-2024-06-05-datadict.csv")
calgbike_ddict

Details

print(calgbike_raw)
## Simple feature collection with 4169 features and 21 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -114.269 ymin: 50.89762 xmax: -113.9302 ymax: 51.17778
## Geodetic CRS:  WGS 84
## # A tibble: 4,169 × 22
##    id    street status   road_type road_type_recode install_year install_type   
##    <chr> <chr>  <chr>    <chr>     <chr>                   <dbl> <chr>          
##  1 1     <NA>   EXISTING <NA>      <NA>                     2003 On-Street Bike…
##  2 2     <NA>   EXISTING <NA>      <NA>                     2009 On-Street Bike…
##  3 3     <NA>   EXISTING <NA>      <NA>                     2009 On-Street Bike…
##  4 4     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  5 5     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  6 6     <NA>   EXISTING <NA>      <NA>                     2005 On-Street Bike…
##  7 7     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  8 8     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  9 9     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
## 10 10    <NA>   INACTIVE <NA>      <NA>                       NA DECOMMISSIONED 
## # ℹ 4,159 more rows
## # ℹ 15 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Toronto Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Toronto, Canada

# Read data
toronbike_raw <- read_sf("../data/toronto-bikeways-2024-06-02.geojson")

# Get download date
toronbike_dldate <- ddesc %>% filter(
    file == "toronto-bikeways-2024-06-02.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(toronbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 23
  • Rows: 1323
toronbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

toronbike_ddict <- read_csv("../data/toronto-bikeways-2024-06-02-datadict.csv")
toronbike_ddict

Details

print(toronbike_raw)
## Simple feature collection with 1323 features and 22 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.63039 ymin: 43.58221 xmax: -79.11803 ymax: 43.85546
## Geodetic CRS:  WGS 84
## # A tibble: 1,323 × 23
##    id    street    street_from street_to road_type road_type_recode install_year
##    <chr> <chr>     <chr>       <chr>     <chr>     <chr>                   <dbl>
##  1 8     Bloor St… Parliament… Castle F… Major Ar… Arterial                 2001
##  2 17    Lake Sho… Humber Bay… Humber B… Major Ar… Arterial                 2001
##  3 18    Lake Sho… 37 M E Fle… Humber B… Major Ar… Arterial                 2001
##  4 19    Lake Sho… 50.7 M E L… 37 M E F… Major Ar… Arterial                 2001
##  5 38    Queens Q… Martin Goo… Bathurst… Collector Collector                2001
##  6 39    Davenpor… Cottingham… Macphers… Minor Ar… Arterial                 2001
##  7 40    Elizabet… College St  Gerrard … Collector Collector                2001
##  8 41    Gerrard … Yonge St    Church St Minor Ar… Arterial                 2001
##  9 42    Macphers… Davenport … Poplar P… Collector Collector                2001
## 10 43    Lake Sho… Marine Par… Palace P… Major Ar… Arterial                 2001
## # ℹ 1,313 more rows
## # ℹ 16 more variables: install_type <chr>, verify_install_year <dbl>,
## #   verify_install_date <chr>, verify_install_type <chr>,
## #   verify_install_comment <chr>, verify_upgrade1_year <dbl>,
## #   verify_upgrade1_date <chr>, verify_upgrade1_type <chr>,
## #   verify_upgrade1_comment <chr>, verify_upgrade2_year <dbl>,
## #   verify_upgrade2_date <chr>, verify_upgrade2_type <chr>, …

Verified Dates

The verification dates manually entered for the cycling infrastructure data were unstructured and do not follow a structured format suitable for analysis.

Nevan Opp went through the dates in Google Sheets, interpreted them, and formatted them into structured dates, while Richard Wen updated and fixed errors as needed.

These structured dates can then be joined back to the unstructured dates to include higher resolution temporal data to the cycling infrastructure install and upgrade dates.

# Read data
vdates_raw <- read_csv("../data/verify-dates-2024-06-12.csv")

# Get download date
vdates_dldate <- ddesc %>% filter(
    file == "verify-dates-2024-06-12.csv"
) %>% pull(download_date)

Data

  • Columns: 8
  • Rows: 298
vdates_raw

Dictionary

The data contains the following columns:

vdates_ddict <- read_csv("../data/verify-dates-2024-06-12-datadict.csv")
vdates_ddict

Files

The data files are available below:

Toronto KSI

KSI (1980-2024) data from the City of Toronto (David McElroy ) for Toronto, Ontario

  • Download Link: NA
  • Download Date: June 13, 2024
  • Data Updated: June 13, 2024
  • Notes: Documentation available at docs/tps-data-doc-2024-01-10.pdf (pages 12-14), Open Government Licence – Ontario, Retrieved from David as previous data from Toronto Police Services were missing about 26% of the ACCNUM
# Read data
toronksi_raw <- read_sf(
    "../data/toronto-ksi-2024-06-13.csv",
    options = c(
        "X_POSSIBLE_NAMES=LONGITUDE",
        "Y_POSSIBLE_NAMES=LATITUDE"
    ),
    crs = 4326
)

# Get download date
toronksi_dldate <- ddesc %>% filter(
    file == "toronto-ksi-2024-06-13.csv"
) %>% pull(download_date)

Map

Note: Due to the large number of records, only the latest year of 2024 is displayed (n = 239).

tmap_mode("view")
tm_shape(toronksi_raw %>% filter(year(DATE) == max(year(DATE)))) +
    tm_dots(
        col = "ACCLASS",
        clustering = TRUE,
        popup.vars = TRUE
    )

Data

  • Columns: 51
  • Rows: 67462
toronksi_raw %>% as_tibble()

Dictionary

The data contains the following columns:

ksi_ddict <- read_csv("../data/toronto-ksi-2024-06-13-datadict.csv")
ksi_ddict

Details

print(toronksi_raw)
## Simple feature collection with 67462 features and 50 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.63925 ymin: 43.58964 xmax: -79.12297 ymax: 43.85544
## Geodetic CRS:  WGS 84
## # A tibble: 67,462 × 51
##    INDEX   ACCNUM YEAR  DATE    TIME  STREET1 STREET2 OFFSET ROAD_CLASS DISTRICT
##  * <chr>   <chr>  <chr> <chr>   <chr> <chr>   <chr>   <chr>  <chr>      <chr>   
##  1 3738192 999991 1980  1980-0… 330   MARKHA… MILNER… ""     Major Art… Scarbor…
##  2 3738193 999991 1980  1980-0… 330   MARKHA… MILNER… ""     Major Art… Scarbor…
##  3 75100   30     1985  1985-0… 300   ALBION… EDGEBR… ""     Major Art… Etobico…
##  4 75101   30     1985  1985-0… 300   ALBION… EDGEBR… ""     Major Art… Etobico…
##  5 75102   30     1985  1985-0… 300   ALBION… EDGEBR… ""     Major Art… Etobico…
##  6 75863   1094   1985  1985-0… 1100  STEELE… STEINW… ""     Major Art… Etobico…
##  7 75864   1094   1985  1985-0… 1100  STEELE… STEINW… ""     Major Art… Etobico…
##  8 75865   1094   1985  1985-0… 1100  STEELE… STEINW… ""     Major Art… Etobico…
##  9 75866   1094   1985  1985-0… 1100  STEELE… STEINW… ""     Major Art… Etobico…
## 10 75154   143    1985  1985-0… 2000  MORNIN… SHEPPA… ""     Major Art… Scarbor…
## # ℹ 67,452 more rows
## # ℹ 41 more variables: WARDNUM <chr>, DIVISION <chr>, LATITUDE <dbl>,
## #   LONGITUDE <dbl>, LOCCOORD <chr>, ACCLOC <chr>, TRAFFCTL <chr>,
## #   VISIBILITY <chr>, LIGHT <chr>, RDSFCOND <chr>, ACCLASS <chr>,
## #   IMPACTYPE <chr>, INVTYPE <chr>, INVAGE <chr>, INJURY <chr>, FATAL_NO <chr>,
## #   INITDIR <chr>, VEHTYPE <chr>, MANOEUVER <chr>, DRIVACT <chr>,
## #   DRIVCOND <chr>, PEDTYPE <chr>, PEDACT <chr>, PEDCOND <chr>, …

Files

The data files are available below:

Cleaning

Combine Bikeways

Combine bikeway data across all cities.

# List of city bikeway data
bike_list <- list(
    vancouver = vancbike_raw,
    calgary = calgbike_raw %>%
        mutate(no_verify_install_type = NA),
    toronto = toronbike_raw %>%
        mutate(no_verify_install_type = NA)
)

# Get common columns across all city bikeways
bike_cols <- bike_list %>%
    map(colnames) %>%
    reduce(intersect)

# Combine bikeway data across cities
bike_raw <- names(bike_list) %>%
    map(function(city) {
        bike_list[[city]] %>%
            select(
                all_of(bike_cols)
            ) %>%
            mutate(
                city = factor(city, levels = names(bike_list)),
                .before = 1
            )
    }) %>%
    reduce(add_row)

# Display combined bikeway data
bike_raw %>% as_tibble

Combine KSI

Combine KSI data across cities and standardize columns.

Note: Only Toronto KSI is included for now.

ksi <- toronksi_raw %>%
    mutate(
        ksi_city = "toronto",
        ksi_id = INDEX,
        ksi_date = DATE
    )

Pivot Bikeway Long

Pivot bikeways data to long format, where each record represents an installation or upgrade.

Also adds the following columns:

  • _pivot_event: one of install, upgrade1, or upgrade2
  • _pivot_year: the year of the install or upgrade from verify_install_year, verify_upgrade1_year, or verify_upgrade2_year
  • _pivot_infra: one of LSB, PL, BUF, or PBL
# Pivot to long format on installs and upgrades
bike <- bike_raw %>%
    pivot_longer(
        cols = c(
            verify_install_date,
            verify_upgrade1_date,
            verify_upgrade2_date
        ),
        names_to = "_pivot_column",
        values_to = "_pivot_value"
    ) %>%
    mutate(
        `_pivot_event` = case_when(
            str_starts(`_pivot_column`, "verify_install") ~ "install",
            str_starts(`_pivot_column`, "verify_upgrade1") ~ "upgrade1",
            str_starts(`_pivot_column`, "verify_upgrade2") ~ "upgrade2"
        ),
        `_pivot_year` = case_when(
            `_pivot_event` == "install" ~ verify_install_year,
            `_pivot_event` == "upgrade1" ~ verify_upgrade1_year,
            `_pivot_event` == "upgrade2" ~ verify_upgrade2_year
        ),
        `_pivot_type` = case_when(
            str_starts(`_pivot_column`, "verify_install") ~ verify_install_type,
            str_starts(`_pivot_column`, "verify_upgrade1") ~ verify_upgrade1_type,
            str_starts(`_pivot_column`, "verify_upgrade2") ~ verify_upgrade2_type
        ),
    )

# Display pivot columns
bike %>%
    as_tibble %>%
    select(
        `_pivot_event`,
        `_pivot_year`,
        `_pivot_column`,
        `_pivot_value`,
        everything()
    ) %>%
    arrange(`_pivot_year`)

Add Bikeway Dates

Join ambiguous verified install/upgrade dates (e.g. Jan 1/2022, 2022/02, Fall 2020) to manually cleaned dates with structured time units (e.g. days, months, quarters, semesters, ranges) and date formats (e.g. 2022-01-01, 2022-02-01).

The following cleaned structured date columns will be added to bikeways:

  • verify_date_raw_count
  • verify_date_type
  • verify_date
  • verify_date_start
  • verify_date_end
  • verify_date_prepost
  • verify_date_notes
# Join clean dates to include structured date formats
bike <- bike %>%
    left_join(
        vdates_raw,
        by = join_by(`_pivot_value` == verify_date_raw)
    )

# Display clean dates cols
bike %>%
    as_tibble %>%
    select(all_of(
        colnames(vdates_raw) %>%
            .[. != "verify_date_raw"]
    )) %>%
    arrange(verify_date_type)

Add Time Units

Add the following time unit columns for installs/upgrades in bikeways and for each ksi record:

  • _time_month: which month (1, 2, 3 … 12) a bikeway had a verified installation/upgrade or ksi record
  • _time_quarter: which quarter (1, 2, 3, 4) of the year a bikeway had a verified installation/upgrade or ksi record
  • _time_third: which third (1, 2, 3) of the year a bikeway had a verified installation/upgrade or ksi record
  • _time_half: which half (1, 2) of the year a bikeway had a verified installation/upgrade or ksi record
  • _time_semester: which semester (1, 2) of the year a bikeway had a verified installation/upgrade or ksi record, where:
    • 1: represents November to April of next year
    • 2: represents May to October of next year
  • _time_year: standardized year column that bikeway had verified installation/upgrade or ksi record
  • _time_..._group: grouped columns of the above defining the time units in textual format
  • KSI records will follow the same patterns above except prefixed with _ksi as well

Note: Date ranges that fall between months, quarters, thirds, or halves of the year were excluded from being classified as any of the time units (e.g. March 31 to April 15 will be excluded as it does not fall within either the 1st or 2nd quarter of the year).

#' Add Time Units to Data Frame
#'
#' This function takes a data frame and adds new columns representing various time units such as month, quarter, third, half, and semester. These columns are derived from a date column or if the time unit is more than day, then additiona start and end date columns
#'
#' @param df A data frame that contains the columns `verify_date`, `verify_date_type`, `verify_date_start`, and `verify_date_end`.
#' @param date_col Name of the date column to use
#' @param start_col Name of the start date column to use
#' @param end_col Name of the end date column to use
#' @param type_col Name of the date type column to use (e.g. month, day, year, etc)
#'
#' @return A data frame with additional columns:
#' \describe{
#'   \item{`_time_month`}{Numeric representation of the month (1-12).}
#'   \item{`_time_quarter`}{Quarter of the year (1-4).}
#'   \item{`_time_third`}{Third of the year (1-3).}
#'   \item{`_time_half`}{Half of the year (1-2).}
#'   \item{`_time_semester`}{Custom semester (1 for May-Oct, 2 for Nov-Apr).}
#' }
#'
#' @examples
#' \dontrun{
#' df <- data.frame(
#'   verify_date = as.Date(c("2021-01-15", "2021-06-20", "2021-09-10")),
#'   verify_date_type = c("day", "month", "day"),
#'   verify_date_start = as.Date(c("2021-01-01", "2021-06-01", "2021-09-01")),
#'   verify_date_end = as.Date(c("2021-01-31", "2021-06-30", "2021-09-30"))
#' )
#' result <- add_time_units(df)
#' print(result)
#' }
#'
#' @import dplyr
#' @importFrom lubridate month year
#' @export
add_time_units <- function(
    df,
    date_col = "verify_date",
    start_col = "verify_date_start",
    end_col = "verify_date_end",
    type_col = "verify_date_type",
    year_col = "_pivot_year"
) {
    df %>%
        rename( # rename req cols
            `_date` := !!sym(date_col),
            `_start` := !!sym(start_col),
            `_end` := !!sym(end_col),
            `_type` := !!sym(type_col),
            `_year` := !!sym(year_col)
        ) %>%
        mutate( # add time units
            
            # Months (monthly)
            `_time_month` = case_when(
                `_type` %in% c("day", "month") ~ month(`_date`, label = T, abbr = F)
            ),
            `_time_month` = as.numeric(`_time_month`),
            
            # Quarters (quarterly)
            `_time_quarter` = case_when(
                month(`_date`) %in% 1:3 |
                (
                    month(`_start`) %in% 1:3 &
                    month(`_end`) %in% 1:3 &
                    year(`_start`) == year(`_end`)
                ) ~ 1,
                month(`_date`) %in% 4:6 |
                (
                    month(`_start`) %in% 4:6 &
                    month(`_end`) %in% 4:6 &
                    year(`_start`) == year(`_end`)
                ) ~ 2,
                month(`_date`) %in% 7:9 |
                (
                    month(`_start`) %in% 7:9 &
                    month(`_end`) %in% 7:9 &
                    year(`_start`) == year(`_end`)
                ) ~ 3,
                month(`_date`) %in% 10:12 |
                (
                    month(`_start`) %in% 10:12 &
                    month(`_end`) %in% 10:12 &
                    year(`_start`) == year(`_end`)
                ) ~ 4
            ),
            
            # Thirds (triyearly)
            `_time_third` = case_when(
                month(`_date`) %in% 1:4 |
                (
                    month(`_start`) %in% 1:4 &
                    month(`_end`) %in% 1:4 &
                    year(`_start`) == year(`_end`)
                ) ~ 1, # Fall
                month(`_date`) %in% 5:8 |
                (
                    month(`_start`) %in% 5:8 &
                    month(`_end`) %in% 5:8 &
                    year(`_start`) == year(`_end`)
                ) ~ 2, # Winter
                month(`_date`) %in% 9:12 |
                (
                    month(`_start`) %in% 9:12 &
                    month(`_end`) %in% 9:12 &
                    year(`_start`) == year(`_end`)
                ) ~ 3 # Spring/Summer
            ),
            
            # Halves (biyearly)
            `_time_half` = case_when(
                month(`_date`) %in% 1:6 |
                (
                    month(`_start`) %in% 1:6 &
                    month(`_end`) %in% 1:6 &
                    year(`_start`) == year(`_end`)
                ) ~ 1,
                month(`_date`) %in% 7:12 |
                (
                    month(`_start`) %in% 7:12 &
                    month(`_end`) %in% 7:12 &
                    year(`_start`) == year(`_end`)
                ) ~ 2
            ),
            
            # Semester (custom range)
            `_time_semester` = case_when(
                month(`_date`) %in% c(11:12, 1:4) |
                (
                    month(`_start`) %in% c(11:12, 1:4) &
                    ( # Nov to Dec of this year
                        month(`_end`) %in% 11:12 &
                        year(`_end`) == year(`_start`)
                    ) |
                    ( # Jan to Apr of this or next year
                        month(`_end`) %in% 1:4 &
                        year(`_end`) == year(`_start`) |
                        year(`_end`) == (year(`_start`) + 1)
                    )
                ) ~ 2, # Nov to Apr of next year
                month(`_date`) %in% 5:10 |
                (
                    month(`_start`) %in% 5:10 &
                    month(`_end`) %in% 5:10 &
                    year(`_start`) == year(`_end`)
                ) ~ 1 # May to Oct
            ),
            
            # Year
            `_time_year` = `_year`
            
        ) %>%
        mutate( # add time groups that include the year
            
            # Month group
            `_time_month_group` = if_else(
                !is.na(`_time_month`),
                glue("{`_year`}_{`_time_month`}"),
                NA
            ),
            
            # Quarter group
            `_time_quarter_group` = if_else(
                !is.na(`_time_quarter`),
                glue("{`_year`}_{`_time_quarter`}"),
                NA
            ),
            
            # Third group
            `_time_third_group` = if_else(
                !is.na(`_time_third`),
                glue("{`_year`}_{`_time_third`}"),
                NA
            ),
            
            # Half group
            `_time_half_group` = if_else(
                !is.na(`_time_half`),
                glue("{`_year`}_{`_time_half`}"),
                NA
            ),
            
            # Semester group
            `_time_semester_group` = if_else(
                !is.na(`_time_semester`),
                glue("{`_year`}_{`_time_semester`}"),
                NA
            ),
            
            # Year group
            `_time_year_group` = if_else(
                !is.na(`_time_year`),
                glue("{`_year`}"),
                NA
            )
            
        ) %>%
        rename( # rename back to orig
            !!date_col := `_date`,
            !!start_col := `_start`,
            !!end_col := `_end`,
            !!type_col := `_type`,
            !!year_col := `_year`
        )
}

# Add time unit columns using func for bikeways
bike <- bike %>% add_time_units

# Add time unit columns using func for ksi
ksi <- ksi %>%
    mutate( # add cols to standardize func params
        verify_date = ksi_date,
        verify_date_start = NA,
        verify_date_end = NA,
        verify_date_type = "day",
        `_pivot_year` = year(ksi_date)
    ) %>%
    add_time_units %>%
    rename_with(
        ~ paste0("_ksi", .x),
        starts_with("_time_")
    ) %>%
    select( # remove no longer needed cols
        -verify_date,
        -verify_date_start,
        -verify_date_end,
        -verify_date_type,
        -`_pivot_year`
    )

Bikeways

# Display time unit cols for bikeways
bike %>%
    as_tibble %>%
    select(
        verify_date_type,
        verify_date,
        verify_date_start,
        verify_date_end,
        starts_with("_")
    ) %>%
    arrange(`_pivot_value`)

KSI

# Display time unit cols for ksi
ksi %>%
    as_tibble %>%
    select(
        ksi_date,
        starts_with("_")
    ) %>%
    arrange(year(ksi_date))

Filter Verified Post-2011

Filter for bikeways and KSI with a verified installation or upgrade after 2011.

# Filter bikeways for post2011
bike <- bike %>%
    filter(`_pivot_year` > 2011)

# Filter ksi for post2011
ksi <- ksi %>%
    filter(year(ksi_date) > 2011)

Bikeways

# Display filtered rows
bike %>%
    as_tibble %>%
    select(
        `_pivot_year`,
        `_pivot_event`,
        verify_install_year,
        verify_upgrade1_year,
        verify_upgrade2_year
    ) %>%
    arrange(`_pivot_year`)

KSI

# Display filtered rows
ksi %>%
    as_tibble %>%
    select(ksi_date) %>%
    arrange(year(ksi_date))

Determine Unit of Time

For bikeways, determine the temporal resolution (unit of time) finer than yearly based on the amount of data available per time unit (sorted from the highest resolution time unit to the lowest resolution time unit):

  • Month (high): refers to verified post-2011 installs and upgrades occurring approximately in a month (1/12) of a year
  • Quarter: refers to verified post-2011 installs and upgrades occurring approximately in a quarter (1/4) of a year
  • Third: refers to verified post-2011 installs and upgrades occurring approximately in a third (1/3) of a year
  • Half (low): refers to verified post-2011 installs and upgrades occurring approximately in a half (1/2) of a year
  • Semester (low): refers to verified post-2011 installs and upgrades occurring approximately in a half (1/2) of a year with the first half (1) being November (same year) to April (next year) and the second half being May to October (next year)
# Prepare plot data
unit_data <- bike %>%
    as_tibble %>%
    group_by(city) %>%
    summarize( # calc installs/upgrades for each time unit
        Month = sum(!is.na(`_time_month`)),
        Quarter = sum(!is.na(`_time_quarter`)),
        Third = sum(!is.na(`_time_third`)),
        Half = sum(!is.na(`_time_half`)),
        Semester = sum(!is.na(`_time_semester`))
    ) %>%
    pivot_longer(
        cols = -city,
        names_to = "type",
        values_to = "n"
    ) %>%
    mutate( # zero as NA
        n = if_else(n == 0, NA, n)
    ) %>%
    left_join( # Add city totals
        bike %>%
            as_tibble %>%
            group_by(city) %>%
            count(name = "total"),
        by = "city"
    ) %>%
    ungroup %>%
    mutate( # calc percentages and add labels
        type = factor(type, levels = c("Month", "Quarter", "Third", "Half", "Semester")),
        city = factor(str_to_title(city), levels = c("Vancouver", "Calgary", "Toronto")),
        perc = n / total * 100,
        perc_label = glue(
            "{str_sub(city, end = 1)}: {round(perc, 2)}%\n",
            "(n={format(n, big.mark = ',', scientific = F)})"
        )
    ) %>%
    group_by(type) %>% # adjust overlapping labels
    arrange(desc(perc)) %>% 
    mutate( # detect overlap labels and shift down and up
        perc_label_y = case_when(
            lag(perc) - perc < 2 ~ perc - 1.5,
            lead(perc) - perc > -2 ~ perc + 1.5,
            .default = perc
        )
    ) %>%
    group_by(city) %>%
    mutate( # add city labels at end of lines
        city_label = if_else(
            type == "Semester",
            glue(
                "{city}\n",
                "(n={format(total, big.mark = ',', scientific = F)})"
            ),
            NA
        )
    ) %>%
    ungroup %>%
    arrange(city, type)

# Get total
unit_total <- sum(unit_data$total %>% unique, na.rm = T)
unit_total_label <- format(unit_total, big.mark = ",", scientific = F)

# Plot line
unit_plot <- unit_data %>%
    ggplot(aes(
        x = type,
        y = perc,
        group = city,
        color = city,
        label = perc_label
    )) +
    geom_line(
        alpha = 0.8,
        linewidth = 1.5
    ) +
    geom_label(
        aes(y = perc_label_y),
        size = 2,
        show.legend = F,
        label.padding = unit(0.5, "lines")
    ) +
    geom_text(
        aes(
            label = city_label,
            y = perc_label_y
        ),
        size = 2.25,
        show.legend = F,
        hjust = 0,
        nudge_x = 0.25
    ) +
    scale_y_continuous(labels = function(x) paste0(x, "%")) +
    scale_x_discrete(
        limits = levels(unit_data$type),
        position = "top"
    ) +
    labs(
        title = glue(
            "% of Verified Post-2011 Installs & Upgrades by Unit of Time\n",
            "(n={unit_total_label})"
        ),
        x = NULL,
        y = NULL,
        color = "City",
        label = NULL,
        group = NULL
    ) +
    theme_minimal() +
    theme(
        legend.position = "none",
        plot.title = element_text(hjust = 0.5)
    )
unit_plot

# Select time unit
tunit = "third"

Inspect Missing Semesters

Display a map of bikeways for each city with missing semesters.

  • Green: represents installs
  • `Orange``: represents 1st upgrades
  • Red: represents 2nd upgrades
# Find bikeways with missing semesters
bike_nosemester <- bike %>% filter(is.na(`_time_semester`)) %>%
    mutate(geometry_wkb = st_as_text(geometry)) %>%
    select(!starts_with("_ksi")) %>%
    select(id, city, `_pivot_event`, `_time_semester`, everything())

# Save missing semesters
bike_nosemester %>%
    select(-geometry) %>%
    write_sf("../data/archive/bikeways-nosemester-2024-06-30.csv", na = "", append = F)

# Remove long values
bike_nosemester <- bike_nosemester %>%
    select(!ends_with("_comment"), -geometry_wkb)

# Function to display map for each city
map_nosemester <- function(city_name, df = bike_nosemester) {
    
    # Filter for city data
    map_data <- df %>% filter(city == city_name)
    
    # Plot interactive map
    tmap_mode("view")
    out <- tm_basemap("CartoDB.DarkMatter") +
        tm_shape(
            map_data %>%
                filter(`_pivot_event` == "install"),
            name = "Install"
        ) +
        tm_lines(
            col = "green",
            popup.vars = T
        ) +
        tm_shape(
            map_data %>%
                filter(`_pivot_event` == "upgrade1"),
            name = "1st Upgrade"
        ) +
        tm_lines(
            col = "orange",
            popup.vars = T
        ) +
        tm_shape(
            map_data %>%
                filter(`_pivot_event` == "upgrade2"),
            name = "2nd Upgrade"
        ) +
        tm_lines(
            col = "red",
            popup.vars = T
        )
    return(out)
}

Vancouver

map_nosemester("vancouver")

Toronto

map_nosemester("toronto")

Calgary

map_nosemester("calgary")

Spatial Join KSI

Spatially join KSI to nearest bikeways.

Each KSI point is joined to a bikeway if they are within 25 meters of the nearest bikeway

Note: if there is overlapping of bikeways, the same KSI can be assigned to multiple bikeways.

# Set cache file for spatiotemp join
sjoin_date <- max(c(
    vancbike_dldate,
    calgbike_dldate,
    toronbike_dldate,
    toronksi_dldate
))
sjoin_file <- glue("../data/cache/sjoin-{sjoin_date}.csv")

# Run spatial join if not cached
if (!file.exists(sjoin_file)) {
    
    # Buffer bikeways and keep ids
    bike_buff25 <- bike %>%
        select(city, id) %>%
        st_buffer(25)
    
    # Spatial join bikeways with ksi keeping ids
    sjoin <- bike_buff25 %>%
        st_join(ksi %>% select(ksi_id)) %>%
        as_tibble %>%
        select(-geometry)
    
    # Cache spatial joined bike and ksi ids
    sjoin %>% write_csv(sjoin_file)
    
} else {
    
    # Read spatial joined bike and ksi ids from cache
    sjoin <- read_csv(sjoin_file)
    
}

Count KSI Per Unit of Time

For each bikeway and event type (install, upgrade 1, upgrade 2), count KSI spatially joined to bikeways per unit of time across years.

This creates columns in the format of _ksi_time_<UNIT>_<YEAR>_<VALUE>, where:

  • <UNIT>: is the unit of time, either month, quarter, half, or semester
  • <YEAR>: is the year for the unit of time
  • <VALUE>: is the number representing the portion of the time range for the unit of time with month being 1-12, quarter being 1-4, third being 1-3, half being 1-2 and semester being 1-2 (this is omitted if time unit is year)
# Count ksi spatially joined to bike
for (tunit in c("month", "third", "quarter", "half", "semester", "year")) {
    
    # Count ksi by unit of time for each bike event
    sjoin_count <- sjoin %>%
        mutate(
            city = as.character(city),
            id = as.character(id),
            ksi_id = as.character(ksi_id)
        ) %>%
        left_join(bike, by = c("city", "id")) %>%
        left_join(ksi, by = "ksi_id") %>%
        group_by(city, id, `_pivot_event`, across(glue("_ksi_time_{tunit}_group"))) %>%
        count %>%
        ungroup
    
    # Pivot to wider format with each col being a time unit
    sjoin_count_wide <- sjoin_count %>%
        pivot_wider(
            names_from = glue("_ksi_time_{tunit}_group"),
            values_from = "n",
            names_prefix = glue("_ksi_time_{tunit}_")
        ) %>%
        rename_with(
            str_to_lower,
            ends_with("_NA")
        )
    
    # Gen all possible year and time value combinations
    year_min <- min(year(ksi$DATE), na.rm = T)
    year_max <- max(year(ksi$DATE), na.rm = T)
    value_max <- switch(
        tunit,
        month = 12,
        third = 3,
        quarter = 4,
        half = 2,
        semester = 2,
        year = 1,
        stop("Invalid value for tunit")
    )
    tunit_cols <- expand.grid(
        year = year_min:year_max,
        value = 1:value_max,
        unit = tunit
    ) %>%
        mutate(
            column = if_else(
                unit != "year",
                glue("_ksi_time_{tunit}_{year}_{value}"),
                glue("_ksi_time_{tunit}_{year}")
            )
        ) %>%
        arrange(year, value) %>%
        pull(column)
    
    # Add missing year and value combos
    tunit_miss <- tunit_cols[!tunit_cols %in% names(sjoin_count_wide)]
    has_tunit_miss <- length(tunit_miss) > 0
    if (has_tunit_miss) {
        sjoin_count_wide <- sjoin_count_wide %>%
            add_column(!!!setNames(
                rep(0, length(tunit_miss)),
                tunit_miss
            ))
    }
    
    # Sort by time unit and set nas to zeroes
    sjoin_count_wide <- sjoin_count_wide %>%
        select(
            city,
            id,
            `_pivot_event`,
            all_of(tunit_cols)
        ) %>%
        mutate(across(
            all_of(tunit_cols),
            ~replace_na(., 0)
        ))

    # Add counts back into bike
    bike <- bike %>%
        left_join(
            sjoin_count_wide,
            by = c("city", "id", "_pivot_event")
        )
    
}